# Lerner, Michael. "Local power: Understanding the adoption and design of county wind energy regulation." Review of Policy Research. Forthcoming.
#
# Data preparation file
#
# Contents -------------------------------------
# 1. County setback variables
# 2. Neighboring county setback variables
# 3. Neighboring county wind farm variables
# 4. Years of wind viability
# 5. Covariates
# 6. State indicator
# 7. Scale and save
#
library(tidyverse)
library(haven)
library(readxl)
library(pbapply)
library(units)
library(maps)
library(sf)
library(spdep)

setwd("~/Google Drive/My Drive/Research/wind_reg/Data/")

# 1. County setback variables -------------------------------------
# Create variables for setback_detail, setback_numeric, setback, and setback_source

# Load setback dataset for 3,105 counties
setbacks_all <- read_csv("Setbacks/county_setbacks_June19.csv")

# Create text setback variable
setbacks_all$setback_detail <- NA
setbacks_all$setback_detail[which(setbacks_all$has_website==0)] <- "no_website"
setbacks_all$setback_detail[which(setbacks_all$setback_400ft=="*")] <- "not_specified"
setbacks_all$setback_detail[which(setbacks_all$ban==1)] <- "ban"
setbacks_all$setback_detail[which(is.na(setbacks_all$setback_400ft) & is.na(setbacks_all$ban) & setbacks_all$has_website==1)] <- "no_ordinance"
setbacks_all$setback_detail[which(is.na(as.numeric(setbacks_all$setback_400ft))==F)] <- setbacks_all$setback_400ft[which(is.na(as.numeric(setbacks_all$setback_400ft))==F)]

# Create numeric setback variable
setbacks_all$setback_numeric <- setbacks_all$setback_detail
setbacks_all$setback_numeric[which(setbacks_all$setback_numeric=="not_specified")] <- 0
setbacks_all$setback_numeric[which(setbacks_all$setback_numeric=="no_website")] <- NA
setbacks_all$setback_numeric[which(setbacks_all$setback_numeric=="ban")] <- 5280
setbacks_all$setback_numeric[which(setbacks_all$setback_numeric %in% c("not_specified","no_ordinance"))] <- 0
setbacks_all$setback_numeric <- as.numeric(setbacks_all$setback_numeric)

# Create ordinal measure of setback
# [none (no ordinance or not specified) / low (1-399ft) / medium (400-600ft) / high (601-1319ft) / ban (1320ft+ or ban)]
setbacks_all$setback <- ifelse(setbacks_all$setback_numeric==0,"none",
                           ifelse(setbacks_all$setback_numeric<400,"low",
                                  ifelse(setbacks_all$setback_numeric<601,"medium",
                                         ifelse(setbacks_all$setback_numeric<1320,"high","ban"))))

setbacks_all$setback <- as.numeric(setbacks_all$setback_detail)

setbacks_all$setback <- ifelse(setbacks_all$has_website==1,"none",NA)
setbacks_all$setback[which(as.numeric(setbacks_all$setback_400ft)<400)] <- "low"
setbacks_all$setback[which(as.numeric(setbacks_all$setback_400ft) %in% 400:600)] <- "medium"
setbacks_all$setback[which(as.numeric(setbacks_all$setback_400ft) %in% 601:1319)] <- "high"
setbacks_all$setback[which(as.numeric(setbacks_all$setback_400ft)>=1320)] <- "ban"
setbacks_all$setback[which(setbacks_all$ban==1)] <- "ban"

setbacks_all$setback <- ordered(setbacks_all$setback,levels = c("none","low","medium","high","ban"))

# Reformat source column to be either the code compiler or the URL to the zoning document
setbacks_all$setback_source <- ifelse(setbacks_all$source %in% c("americanlegal","codebook.com","ecode360","municode","qualitycode","sterlingcodifiers"),setbacks_all$source,setbacks_all$website_code)
setbacks_all$setback_source[which(setbacks_all$setback_detail=="no_website")] <- NA

# Retain FIPS, polyname, county, state, setback, setback_detail, setback_numeric, setback_source
# Subset to the 23 states with county control over commercial wind farm siting
setbacks <- setbacks_all %>%
  select(fips, polyname, county, state, setback, setback_detail, setback_numeric, setback_source) %>%
  filter(state %in% c("alabama","arizona", "arkansas", 
                      "california","florida","georgia", 
                      "hawaii","idaho","illinois",
                      "indiana","iowa","kansas",
                      "louisiana","maryland", "mississippi",
                      "missouri", "montana","nebraska", 
                      "nevada","north carolina","utah",
                      "virginia", "washington"))

# 2. Neighboring county setback variables-------------------------------------
# Create variable for nb_setback_existence, nb_setback_existence_p and nb_setback_stringency

# Load county shapefile and create fips variable
county <- st_read("Census Data/cb_2018_us_county_20m/",layer = "cb_2018_us_county_20m")
county$fips <- as.numeric(paste0(county$STATEFP,county$COUNTYFP))

# Create matrix of neighbors
row.names(county) <- county$fips
nb <- poly2nb(county)
nb_mat <- nb2mat(nb, style = "B",zero.policy = T)
colnames(nb_mat) <- rownames(nb_mat)

# For island counties (San Juan County, WA and Hawaii counties), use three nearest counties (reciprocal)
hawaii_dists <- st_distance((county[which(county$fips %in% 15000:15999),]),(county[which(county$fips %in% 15000:15999),]))
was_dists <- st_distance((county[which(county$fips %in% 53000:53999),]),(county[which(county$fips %in% 53000:53999),]))

nb_hawaii <- apply(hawaii_dists,2,function(x){
  nb.idx <- which(rank(x,ties.method = "min")<=3)
  county$fips[which(county$fips %in% 15000:15999)][nb.idx]
})

for(i in 1:ncol(nb_hawaii)){
  nb_mat[which(colnames(nb_mat) %in% 15000:15999)[i],which(rownames(nb_mat) %in% nb_hawaii[,i])] <- 1
  nb_mat[which(rownames(nb_mat) %in% nb_hawaii[,i]),which(colnames(nb_mat) %in% 15000:15999)[i]] <- 1
}

nb_was <- apply(was_dists,2,function(x){
  nb.idx <- which(rank(x,ties.method = "min")<=3)
  county$fips[which(county$fips %in% 53000:53999)][nb.idx]
})

nb_sj <- nb_was[[which(county$fips[which(county$fips %in% 53000:53999)] == "53055")]]

nb_mat[which(colnames(nb_mat) =="53055"),which(rownames(nb_mat) %in% nb_sj)] <- 1
nb_mat[which(rownames(nb_mat) %in% nb_sj),which(colnames(nb_mat) =="53055")] <- 1

diag(nb_mat) <- 0

# For each county, if at least one neighbor has a non-zero setback, mark as 1. Otherwise, 0
setbacks$nb_setback_existence <- unlist(pbsapply(setbacks$fips,function(x){
  nb_setbacks <- setbacks_all[which(as.numeric(setbacks_all$fips) %in% names(which(nb_mat[,which(colnames(nb_mat)==x)]==1))),'setback']
  ifelse(any(which(as.numeric(unlist(nb_setbacks))>1)),1,0)
}))

# For each county, the proportion of a county's neighbors with a setback
setbacks$nb_setback_existence_p <- unlist(pbsapply(setbacks$fips,function(x){
  nb_setbacks <- setbacks_all[which(as.numeric(setbacks_all$fips) %in% names(which(nb_mat[,which(colnames(nb_mat)==x)]==1))),'setback']
  nb_setbacks <- ifelse(as.numeric(unlist(nb_setbacks))>1,1,0)
  sum(nb_setbacks,na.rm = T)/length(which(is.na(nb_setbacks)==F))
}))

# For each county, the median stringency of a county's neighbors with a setback
# [none (no ordinance or not specified) / low (1-399ft) / medium (400-600ft) / high (601-1319ft) / ban (1320ft+ or ban)]
setbacks$nb_setback_stringency_median <- unlist(pbsapply(setbacks$fips,function(x){
  nb_setbacks <- setbacks_all[which(as.numeric(setbacks_all$fips) %in% names(which(nb_mat[,which(colnames(nb_mat)==x)]==1))),'setback']
  round(median(as.numeric(nb_setbacks$setback),na.rm = T))
}))

setbacks$nb_setback_stringency_median <- ordered(setbacks$nb_setback_stringency_median,
                                                 labels = c("none","low","medium","high","ban"),
                                                 levels = 1:5)

# For each county, the maximum stringency of a county's neighbors with a setback
# [none (no ordinance or not specified) / low (1-399ft) / medium (400-600ft) / high (601-1319ft) / ban (1320ft+ or ban)]
setbacks$nb_setback_stringency_max <- unlist(pbsapply(setbacks$fips,function(x){
  nb_setbacks <- setbacks_all[which(as.numeric(setbacks_all$fips) %in% names(which(nb_mat[,which(colnames(nb_mat)==x)]==1))),'setback']
  max(as.numeric(nb_setbacks$setback),na.rm = T)
}))

setbacks$nb_setback_stringency_max <- ordered(setbacks$nb_setback_stringency_max,
                                              labels = c("none","low","medium","high","ban"),
                                              levels = 1:5)

# 3. Neighboring county wind farm variables -------------------------------------
# Load US Wind Turbine Database
fedw <- st_read("Wind Farms/uswtdbSHP/",layer = "uswtdb_v3_0_1_20200514")

# Calculate number of projects and total capacity per county
farms <- fedw %>% filter(t_cap >=0) %>% group_by(t_fips) %>% summarize(
  farm_count = length(unique(p_name)),
  farm_capacity = sum(t_cap)/1000) 

farms$t_fips <- as.numeric(as.character(farms$t_fips))

setbacks$nb_wind_farm_existence <- pbsapply(setbacks$fips, function(x){
  nb_fips <- names(which(nb_mat[,which(colnames(nb_mat)==x)]==1))
  if(sum(farms$t_fips %in% nb_fips)>0){
    return(1)
  } else {
    return(0)
  }
})

setbacks$nb_wind_farm_existence_p <- pbsapply(setbacks$fips, function(x){
  nb_fips <- names(which(nb_mat[,which(colnames(nb_mat)==x)]==1))
  sum(farms$t_fips %in% nb_fips)/length(nb_fips)
})

# nb_wind_farm_capacity -------------------------------------
# [logged MW, sum of all contiguous neighbors]
setbacks$nb_wind_farm_capacity <- pbsapply(setbacks$fips, function(x){
  nb_fips <- names(which(nb_mat[,which(colnames(nb_mat)==x)]==1))
  if(sum(farms$t_fips %in% nb_fips)>0){
    return(sum(farms$farm_capacity[which(farms$t_fips %in% nb_fips)]))
  } else {
    return(0)
  }
})

setbacks$nb_wind_farm_capacity <- log(setbacks$nb_wind_farm_capacity+1)

# nb_wind_farm_count -------------------------------------
# [log sum of all contiguous neighbors]
setbacks$nb_wind_farm_count <- pbsapply(setbacks$fips, function(x){
  nb_fips <-  names(which(nb_mat[,which(colnames(nb_mat)==x)]==1))
  sum(farms$farm_count[farms$t_fips %in% nb_fips])
})

setbacks$nb_wind_farm_count <- log(setbacks$nb_wind_farm_count+1)

# 4. Years of wind viability -------------------------------------
# [0 years, 1-5 years, 6-10 years, >10 years]

# Subset county shapefile to relevant counties
county_subset <- county %>% filter(fips %in% setbacks$fips) %>% dplyr::select(GEOID)
colnames(county_subset)[1] <- c("fips")
county_subset$fips <- as.numeric(as.character(county_subset$fips))
setbacks <- left_join(county_subset,setbacks)

# Load wind potential for 2008, 2014, and "near-future" technology (80-m turbines)
# Transform CRS and interpolate for each county by summing area of potential land
setbacks$land_area <- set_units(st_area(setbacks),"km^2")

wp08 <- st_read("Wind Potential/2008","pot_wind_cap_080_2008")
wp08 <- st_transform(wp08,crs = st_crs(setbacks))
wp08 <- st_interpolate_aw(wp08 %>% select(-state,-region),to = setbacks[-which(setbacks$state=="hawaii"),],extensive = T)
setbacks$wp08_km2 <- NA
setbacks$wp08_km2[-which(setbacks$state=="hawaii")] <- wp08$a30

wp14 <- st_read("Wind Potential/2014","pot_wind_cap_080_current")
wp14 <- st_transform(wp14,crs = st_crs(setbacks))
wp14 <- st_interpolate_aw(wp14 %>% select(-state,-region),to = setbacks[-which(setbacks$state=="hawaii"),],extensive = T)
setbacks$wp14_km2 <- NA
setbacks$wp14_km2[-which(setbacks$state=="hawaii")] <- wp14$a30

wpnf <- st_read("Wind Potential/nearfuture","pot_wind_cap_080_near_future")
wpnf <- st_transform(wpnf,crs = st_crs(setbacks))
wpnf <- st_interpolate_aw(wpnf %>% select(-state,-region),to = setbacks[-which(setbacks$state=="hawaii"),],extensive = T)
setbacks$wpnf_km2 <- NA
setbacks$wpnf_km2[-which(setbacks$state=="hawaii")] <- wpnf$a30

# Determine duration of time county has had sufficient wind potential for utility-scale project
# How much land does a 100MW, 80m hub height project need? Around 40 km^2. Assuming that 1/4 of landowners say yes, use 160 km^2.
setbacks$wp08_viable <- ifelse(setbacks$wp08_km2 >= 160,1,0)
setbacks$wp14_viable <- ifelse(setbacks$wp14_km2 >= 160,1,0)
setbacks$wpnf_viable <- ifelse(setbacks$wpnf_km2 >= 160,1,0)

setbacks$years_viable <- NA
for(i in 1:nrow(setbacks)){
  if(setbacks$state[i]!="hawaii"){
    if(setbacks$wpnf_viable[i] == 0){
      setbacks$years_viable[i] <- "0 years"
    } else if (setbacks$wpnf_viable[i] == 1 & setbacks$wp14_viable[i] == 0){
      setbacks$years_viable[i] <- "1-5 years"
    } else if (setbacks$wpnf_viable[i] == 1 & setbacks$wp14_viable[i] == 1 & setbacks$wp08_viable[i] == 0){
      setbacks$years_viable[i] <- "6-10 years"
    } else {
      setbacks$years_viable[i] <- ">10 years"
    }
  }
}

setbacks$years_viable <- ordered(setbacks$years_viable,levels = c("0 years", "1-5 years", "6-10 years", ">10 years"))

setbacks$years_viable_binary <- ifelse(setbacks$years_viable==">10 years",">10 years","0-10years")

# Robustness check: 1/8 landowners and 1/2 landowners say yes
setbacks$wp08_viable_lowaccept <- ifelse(setbacks$wp08_km2 >= 320,1,0)
setbacks$wp14_viable_lowaccept <- ifelse(setbacks$wp14_km2 >= 320,1,0)
setbacks$wpnf_viable_lowaccept <- ifelse(setbacks$wpnf_km2 >= 320,1,0)

setbacks$years_viable_lowaccept <- NA
for(i in 1:nrow(setbacks)){
  if(setbacks$state[i]!="hawaii"){
    if(setbacks$wpnf_viable_lowaccept[i] == 0){
      setbacks$years_viable_lowaccept[i] <- "0 years"
    } else if (setbacks$wpnf_viable_lowaccept[i] == 1 & setbacks$wp14_viable_lowaccept[i] == 0){
      setbacks$years_viable_lowaccept[i] <- "1-5 years"
    } else if (setbacks$wpnf_viable_lowaccept[i] == 1 & setbacks$wp14_viable_lowaccept[i] == 1 & setbacks$wp08_viable_lowaccept[i] == 0){
      setbacks$years_viable_lowaccept[i] <- "6-10 years"
    } else {
      setbacks$years_viable_lowaccept[i] <- ">10 years"
    }
  }
}

setbacks$years_viable_lowaccept <- ordered(setbacks$years_viable_lowaccept,levels = c("0 years", "1-5 years", "6-10 years", ">10 years"))

setbacks$wp08_viable_highaccept <- ifelse(setbacks$wp08_km2 >= 80,1,0)
setbacks$wp14_viable_highaccept <- ifelse(setbacks$wp14_km2 >= 80,1,0)
setbacks$wpnf_viable_highaccept <- ifelse(setbacks$wpnf_km2 >= 80,1,0)

setbacks$years_viable_highaccept <- NA
for(i in 1:nrow(setbacks)){
  if(setbacks$state[i]!="hawaii"){
    if(setbacks$wpnf_viable_highaccept[i] == 0){
      setbacks$years_viable_highaccept[i] <- "0 years"
    } else if (setbacks$wpnf_viable_highaccept[i] == 1 & setbacks$wp14_viable_highaccept[i] == 0){
      setbacks$years_viable_highaccept[i] <- "1-5 years"
    } else if (setbacks$wpnf_viable_highaccept[i] == 1 & setbacks$wp14_viable_highaccept[i] == 1 & setbacks$wp08_viable_highaccept[i] == 0){
      setbacks$years_viable_highaccept[i] <- "6-10 years"
    } else {
      setbacks$years_viable_highaccept[i] <- ">10 years"
    }
  }
}

setbacks$years_viable_highaccept <- ordered(setbacks$years_viable_highaccept,levels = c("0 years", "1-5 years", "6-10 years", ">10 years"))

# 5. Covariates -------------------------------------
## 5.1 Population density -------------------------------------
# [logged people/km^2]
population <- read_csv("Census Data/co-est2019-alldata.csv")
population$fips <- paste0(population$STATE,population$COUNTY)
population <- population %>% select(fips, POPESTIMATE2019)
colnames(population) <- c("fips","population")
population$fips <- as.numeric(population$fips)

setbacks <- left_join(setbacks,population)

setbacks$population_density <- setbacks$population/setbacks$land_area
setbacks$population_density <- log(setbacks$population_density)

## 5.2 County GDP per capita -------------------------------------
# [logged thousands of 2012 USD/cap]
county_gdp <- read_csv("County GDP/county_gdp_2019.csv",skip = 4)
colnames(county_gdp) <- c("fips","polyname","county_gdp")
county_gdp <- county_gdp %>% na.omit(county_gdp)
county_gdp$fips <- as.numeric(county_gdp$fips)

# For counties that have been combined with other counties, apportion GDP by population
idx <- which(setbacks$fips %in% county_gdp$fips==F)

while(length(idx)>0){
  i <- idx[1]
  # Name of missing county
  name <- str_split(setbacks$polyname[i],pattern = ",",simplify = T)[,2]
  # Index of combined county entry
  match <- grep(tolower(county_gdp$polyname),pattern = name)
  if(length(match)>1){
    match <- match[str_extract(county_gdp$fips[match],pattern = "^[0-9][0-9]")==str_extract(setbacks$fips[i],pattern = "^[0-9][0-9]")]
  }
  if(length(match)>1){
    match <- match[which(nchar(county_gdp$fips[match])==nchar(setbacks$fips[i]))]
  }
  if(length(match)>1){
    if(name=="roanoke"){
      # For Roanoke, VA, combine entries for Roanoke and Salem under FIPS for roanoke
      county_gdp$county_gdp[match[1]] <- as.character(sum(as.numeric(county_gdp$county_gdp[match])))
      county_gdp$fips[match[1]] <- setbacks$fips[i]
      county_gdp <- county_gdp[-match[-1],]
      # Also combine population for Roanoke and Salem
      setbacks$population[i] <- sum(population$population[which(population$fips %in% county$fips[which(county$STATEFP==51 & county$NAME %in% c("Roanoke","Salem"))])])
      # Recalculate population density
      setbacks$population_density[i] <- setbacks$population[i]/setbacks$land_area[i]
    } else if (name == "frederick"){
      # For Frederick, VA, assign its FIPS to the FIPS for the whole county
      county_gdp$fips[match[1]] <- setbacks$fips[i]
      # Also combine population for Frederick and Winchester
      setbacks$population[i] <- sum(population$population[which(population$fips %in% county$fips[which(county$STATEFP==51 & county$NAME %in% c("Frederick","Winchester"))])])
      # Recalculate population density
      setbacks$population_density[i] <- setbacks$population[i]/setbacks$land_area[i]
    } else {
      print(paste0("error ",i))
    }
  } else {
    # Adjust names
    if(str_extract(county_gdp$fips[match],pattern = "^[0-9][0-9]")==str_extract(setbacks$fips[i],pattern = "^[0-9][0-9]")){
      name <- str_split(county_gdp$polyname[match],pattern = ",",simplify = T)[,1]
      name <- tolower(unlist(str_split(name,pattern = " [+] ")))
      name <- unique(paste0(setbacks$state[i],",",name))
    }
    name <- name[which(name %in% setbacks$polyname)]
    
    # Apportion GDP by population
    temp <- county_gdp[1:length(name),]
    temp$fips <- setbacks$fips[which(setbacks$polyname %in% name)]
    temp$polyname <- NA
    pops <- setbacks[which(setbacks$polyname %in% name),c('fips','population')]
    pops <- pops %>% arrange(fips)
    temp <- temp %>% arrange(fips)
    temp$county_gdp <- as.numeric(county_gdp$county_gdp[match])*pops$population/sum(pops$population)
    # Replace combined county entry with separate rows
    temp$county_gdp <- as.character(temp$county_gdp)
    fips_to_remove = c(county_gdp$fips[match],temp$fips)
    county_gdp <- bind_rows(county_gdp %>% filter(fips %in% fips_to_remove==F),temp)
  }
  idx <- which(setbacks$fips %in% county_gdp$fips==F)
}

county_gdp$county_gdp <- log(as.numeric(county_gdp$county_gdp))
county_gdp <- county_gdp[,-2]

setbacks <- left_join(setbacks,county_gdp)

## 5.3 Percentage of county GDP from mining, quarrying, and oil and gas extraction sectors per capita -------------------------------------
# [logged thousands of 2012 USD from sectors in the mining, quarrying, and oil and gas extraction sectors]
extraction_gdp <- read_csv("Extraction GDP/bea_2019_miningquarryingextraction_gdp.csv",skip = 4)
colnames(extraction_gdp) <- c("fips","polyname","extraction_gdp")
extraction_gdp$extraction_gdp <- as.numeric(extraction_gdp$extraction_gdp)
extraction_gdp$fips <- as.numeric(extraction_gdp$fips)

# For counties that have been combined with other counties, apportion extraction GDP by population
idx <- which(setbacks$fips %in% extraction_gdp$fips==F)

while(length(idx)>0){
  i <- idx[1]
  # Name of missing county
  name <- str_split(setbacks$polyname[i],pattern = ",",simplify = T)[,2]
  # Index of combined county entry
  match <- grep(tolower(extraction_gdp$polyname),pattern = name)
  if(length(match)>1){
    match <- match[str_extract(extraction_gdp$fips[match],pattern = "^[0-9][0-9]")==str_extract(setbacks$fips[i],pattern = "^[0-9][0-9]")]
  }
  if(length(match)>1){
    match <- match[which(nchar(extraction_gdp$fips[match])==nchar(setbacks$fips[i]))]
  }
  if(length(match)>1){
    if(name=="roanoke"){
      # For Roanoke, VA, combine entries for Roanoke and Salem under FIPS for roanoke
      extraction_gdp$extraction_gdp[match[1]] <- as.character(sum(as.numeric(extraction_gdp$extraction_gdp[match])))
      extraction_gdp$fips[match[1]] <- setbacks$fips[i]
      extraction_gdp <- extraction_gdp[-match[-1],]
      # Also combine population for Roanoke and Salem
      setbacks$population[i] <- sum(population$population[which(population$fips %in% county$fips[which(county$STATEFP==51 & county$NAME %in% c("Roanoke","Salem"))])])
      # Recalculate population density
      setbacks$population_density[i] <- setbacks$population[i]/setbacks$land_area[i]
    } else if (name == "frederick"){
      # For Frederick, VA, assign its FIPS to the FIPS for the whole county
      extraction_gdp$fips[match[1]] <- setbacks$fips[i]
      # Also combine population for Frederick and Winchester
      setbacks$population[i] <- sum(population$population[which(population$fips %in% county$fips[which(county$STATEFP==51 & county$NAME %in% c("Frederick","Winchester"))])])
      # Recalculate population density
      setbacks$population_density[i] <- setbacks$population[i]/setbacks$land_area[i]
    } else {
      print(paste0("error ",i))
    }
  } else {
    # Adjust names
    if(str_extract(extraction_gdp$fips[match],pattern = "^[0-9][0-9]")==str_extract(setbacks$fips[i],pattern = "^[0-9][0-9]")){
      name <- str_split(extraction_gdp$polyname[match],pattern = ",",simplify = T)[,1]
      name <- tolower(unlist(str_split(name,pattern = " [+] ")))
      name <- unique(paste0(setbacks$state[i],",",name))
    }
    name <- name[which(name %in% setbacks$polyname)]
    
    # Apportion extraction GDP by population
    temp <- extraction_gdp[1:length(name),]
    temp$fips <- setbacks$fips[which(setbacks$polyname %in% name)]
    temp$polyname <- NA
    pops <- setbacks[which(setbacks$polyname %in% name),c('fips','population')]
    pops <- pops %>% arrange(fips)
    temp <- temp %>% arrange(fips)
    if(length(extraction_gdp$extraction_gdp[match])>1){
      temp$extraction_gdp <- as.numeric(extraction_gdp$extraction_gdp[match])*pops$population/sum(pops$population)
      temp$extraction_gdp <- as.character(temp$extraction_gdp)
    } else {
      temp$extraction_gdp <- NA
    }
    # Replace combined county entry with separate rows
    fips_to_remove = c(extraction_gdp$fips[match],temp$fips)
    extraction_gdp <- bind_rows(extraction_gdp %>% filter(fips %in% fips_to_remove==F),temp)
  }
  idx <- which(setbacks$fips %in% extraction_gdp$fips==F)
}

extraction_gdp$extraction_gdp <- log(as.numeric(extraction_gdp$extraction_gdp)+1)
extraction_gdp <- extraction_gdp[,-2]

setbacks <- left_join(setbacks,extraction_gdp)

## 5.4 Percentage of votes for the Republican presidential candidate, 2000-2016 average -------------------------------------
# [County Republican presidential vote, % 2000-2016 average]
republican_vote <- read_csv("Election Results/countypres_2000-2016.csv")

republican_vote <- republican_vote %>% 
  group_by(FIPS) %>%
  summarize(republican_vote = sum(candidatevotes[which(party=="republican")])/sum(candidatevotes,na.rm = T))

colnames(republican_vote)[1] <- "fips"

# Add Kalawao, HI
kalawao_votes <- tibble(year = seq(2000,2016,by=4), republican_votes = c(11, 14, 6, 2, 1), total_votes = c(45, 40, 31, 27, 20))
kalawao_votes <- tibble(fips = 15005, republican_vote = sum(kalawao_votes$republican_votes)/sum(kalawao_votes$total_votes))

republican_vote <- bind_rows(republican_vote, kalawao_votes)

setbacks <- left_join(setbacks, republican_vote)

## 5.5 Transmission distance -------------------------------------
# [logged km from county centroid]
elines <- st_read("Transmission Lines","Transmission_Lines")
elines <- st_transform(elines,st_crs(setbacks))

# Subset for only high-voltage lines
elines <- elines %>% filter(VOLT_CLASS %in% c("220-287",345, 500, "735 and Above"))

# Calculate distances from county centroids to nearest high-voltage lines
dists <- st_distance(st_centroid(setbacks), elines[st_nearest_feature(st_centroid(setbacks),elines),],by_element = T)

setbacks$transmission_distance <- log(drop_units(set_units(dists,"km")))

## 5.6 Local government jobs -------------------------------------
# [logged number of jobs in local government]
local_government_jobs <- read_csv("County Government/bea_2019_localgovernmentemployment.csv",skip = 4)
colnames(local_government_jobs) <- c("fips","polyname","local_government_jobs")
local_government_jobs$local_government_jobs <- as.numeric(local_government_jobs$local_government_jobs)
local_government_jobs$fips <- as.numeric(local_government_jobs$fips)

# For counties that have been combined with other counties, apportion jobs by population
idx <- which(setbacks$fips %in% local_government_jobs$fips==F)

while(length(idx)>0){
  i <- idx[1]
  # Name of missing county
  name <- str_split(setbacks$polyname[i],pattern = ",",simplify = T)[,2]
  # Index of combined county entry
  match <- grep(tolower(local_government_jobs$polyname),pattern = name)
  if(length(match)>1){
    match <- match[str_extract(local_government_jobs$fips[match],pattern = "^[0-9][0-9]")==str_extract(setbacks$fips[i],pattern = "^[0-9][0-9]")]
  }
  if(length(match)>1){
    match <- match[which(nchar(local_government_jobs$fips[match])==nchar(setbacks$fips[i]))]
  }
  if(length(match)>1){
    if(name=="roanoke"){
      # For Roanoke, VA, combine entries for Roanoke and Salem under FIPS for roanoke
      local_government_jobs$local_government_jobs[match[1]] <- as.character(sum(as.numeric(local_government_jobs$local_government_jobs[match])))
      local_government_jobs$fips[match[1]] <- setbacks$fips[i]
      local_government_jobs <- local_government_jobs[-match[-1],]
      # Also combine population for Roanoke and Salem
      setbacks$population[i] <- sum(population$population[which(population$fips %in% county$fips[which(county$STATEFP==51 & county$NAME %in% c("Roanoke","Salem"))])])
      # Recalculate population density
      setbacks$population_density[i] <- setbacks$population[i]/setbacks$land_area[i]
    } else if (name == "frederick"){
      # For Frederick, VA, assign its FIPS to the FIPS for the whole county
      local_government_jobs$fips[match[1]] <- setbacks$fips[i]
      # Also combine population for Frederick and Winchester
      setbacks$population[i] <- sum(population$population[which(population$fips %in% county$fips[which(county$STATEFP==51 & county$NAME %in% c("Frederick","Winchester"))])])
      # Recalculate population density
      setbacks$population_density[i] <- setbacks$population[i]/setbacks$land_area[i]
    } else {
      print(paste0("error ",i))
    }
  } else {
    # Adjust names
    if(str_extract(local_government_jobs$fips[match],pattern = "^[0-9][0-9]")==str_extract(setbacks$fips[i],pattern = "^[0-9][0-9]")){
      name <- str_split(local_government_jobs$polyname[match],pattern = ",",simplify = T)[,1]
      name <- tolower(unlist(str_split(name,pattern = " [+] ")))
      name <- unique(paste0(setbacks$state[i],",",name))
    }
    name <- name[which(name %in% setbacks$polyname)]
    
    # Apportion job by population
    temp <- local_government_jobs[1:length(name),]
    temp$fips <- setbacks$fips[which(setbacks$polyname %in% name)]
    temp$polyname <- NA
    pops <- setbacks[which(setbacks$polyname %in% name),c('fips','population')]
    pops <- pops %>% arrange(fips)
    temp <- temp %>% arrange(fips)
    if(length(local_government_jobs$local_government_jobs[match])>1){
      temp$local_government_jobs <- as.numeric(local_government_jobs$local_government_jobs[match])*pops$population/sum(pops$population)
      temp$local_government_jobs <- as.character(temp$local_government_jobs)
    } else {
      temp$local_government_jobs <- NA
    }
    # Replace combined county entry with separate rows
    fips_to_remove = c(local_government_jobs$fips[match],temp$fips)
    local_government_jobs <- bind_rows(local_government_jobs %>% filter(fips %in% fips_to_remove==F),temp)
  }
  idx <- which(setbacks$fips %in% local_government_jobs$fips==F)
}

local_government_jobs$local_government_jobs <- log(as.numeric(local_government_jobs$local_government_jobs)+1)
local_government_jobs <- local_government_jobs[,-2]

setbacks <- left_join(setbacks,local_government_jobs)

# 6. State indicator -------------------------------------
# Create state fixed effects vector that combines all southeastern states
setbacks$state_fe <- ifelse(setbacks$state %in% c("arkansas","louisiana","alabama","mississippi","georgia","florida"),"southeast",setbacks$state)

# 7. Scale and save-------------------------------------
setbacks <- setbacks %>%
  select(1:4, state_fe, land_area, population, 5:16, 18:33, 35:41)

# Save plot data with geometry data
save(setbacks, file = "../ROPR Replication Materials/plot_data_3Aug2021")

# Remove geometry data
setbacks <- as.data.frame(setbacks) %>% select(-geometry)

# Center and scale continuous variables
setbacks <- setbacks %>%
  mutate(population_density = scale(population_density),
         county_gdp = scale(county_gdp),
         extraction_gdp = scale(extraction_gdp),
         republican_vote = scale(republican_vote),
         transmission_distance = scale(transmission_distance),
         local_government_jobs = scale(local_government_jobs),
         nb_setback_existence_p = scale(nb_setback_existence_p),
         nb_wind_farm_capacity = scale(nb_wind_farm_capacity),
         nb_wind_farm_count = scale(nb_wind_farm_count))


write_dta(setbacks, path = "../ROPR Replication Materials/analysis_data_3Aug2021.dta")
